# text_packages_rep.R
# Replication archive version - embeds scholr functions locally
# (Original text_packages.R uses the scholr package from github.com/owasow/scholr)

## ---- load_packages, include = FALSE ----

# data processing
library(car)


# multinomial
library(nnet)
library(mgcv)

#library(latex2exp)
library(ggtext)



# tidyverse (loads dplyr, ggplot2, tidyr, purrr, stringr, forcats, lubridate, etc.)
library(tidyverse)
library(here)
library(janitor)
library(scales)
library(glue)
library(broom)

# tidy eval
library(rlang)
# devtools::install_github("milesmcbain/friendlyeval")
#library(friendlyeval)

# tables and assessing models
library(stargazer)
options(kableExtra.latex.load_packages = FALSE)
library(kableExtra)
library(xtable)
library(margins)

# importing
library(readxl)
library(haven)

# text analysis
library(tidytext)
library(topicmodels)
library(tm)
library(SnowballC)
library(sentimentr)

# data visualization
library(sjPlot)
library(gridExtra)
library(ggridges)
library(ggmosaic)
library(jtools)
library(interactions)
library(cowplot)
library(amerika)
library(patchwork)
library(grid)
library(gridtext)

# matching
library(cobalt)
library(CBPS)
library(MatchIt)

# modeling
library(pscl) # zero inflated poisson models
library(boot) # zero inflated poisson models
library(MASS)
library(DescTools) # PseudoR2
library(mediation)
#library(zeligverse)
library(marginaleffects)
library(ggeffects)  # part of the sjPlot ecosystem
library(effectsize) # Cohen's d


# ggplot text annotation and fonts
#library(latex2exp)
# Note: ggtext already loaded above


# correlations, PCA, randomForest and related methods
# for interpreting text vars
library(corrr)
library(GGally)
library(corrplot)
library(randomForest)
library(FactoMineR) # PCA
library(factoextra) # PCA


# missing data
library(naniar)
library(Amelia)
library(mice)

# misc
library(ANOVAreplication)
library(vtable)   # build variable table

library(rstudioapi)
library(sjlabelled)

# package management
# install from https://github.com/r-lib/conflicted
suppressMessages({
    library(conflicted)
    conflicted::conflicts_prefer(dplyr::filter)
    conflicted::conflicts_prefer(dplyr::select)
    conflicted::conflicts_prefer(dplyr::summarize)
    conflicted::conflicts_prefer(here::here)
    conflicted::conflicts_prefer(cowplot::plot_grid)
    conflicted::conflicts_prefer(nnet::multinom)
    conflicted::conflicts_prefer(ggplot2::annotate)
    conflicted::conflicts_prefer(ggplot2::margin)
    # Note: p() function defined locally below (was scholr::p)
})


# =============================================================================
# Local scholr functions (embedded for replication without package dependency)
# Original package: https://github.com/owasow/scholr
# =============================================================================

# --- Inline helpers for reporting model results ---
b <- function(model, var, digits = 2) {
    round(stats::coef(model)[var], digits)
}

p <- function(model, var, digits = 3) {
    s <- summary(model)
    coef_table <- s$coefficients
    p_col <- grep("Pr\\(|p-value|p.value|Pr\\(>", colnames(coef_table), value = TRUE)
    if (length(p_col) > 0) {
        pval <- coef_table[var, p_col[1]]
    } else if (ncol(coef_table) >= 4) {
        pval <- coef_table[var, 4]
    } else {
        stop("Cannot find p-value column in model summary")
    }
    if (pval < 0.001) "< .001" else paste0("= ", sub("^0", "", sprintf(paste0("%.", digits, "f"), pval)))
}

se <- function(model, var, digits = 2) {
    round(summary(model)$coefficients[var, "Std. Error"], digits)
}

or <- function(model, var, digits = 2) {
    round(exp(stats::coef(model)[var]), digits)
}

z <- function(model, var, digits = 2) {
    s <- summary(model)
    coef_table <- s$coefficients
    stat_col <- grep("z value|t value|z|t", colnames(coef_table), value = TRUE)
    if (length(stat_col) > 0) round(coef_table[var, stat_col[1]], digits)
    else if (ncol(coef_table) >= 3) round(coef_table[var, 3], digits)
    else NA
}

ci95 <- function(model, var, digits = 2, exp = FALSE) {
    ci <- stats::confint(model, var, level = 0.95)
    if (exp) ci <- base::exp(ci)
    paste0("[", round(ci[1], digits), ", ", round(ci[2], digits), "]")
}

bp <- function(model, var, b_digits = 2, p_digits = 3) {
    paste0("b = ", b(model, var, b_digits), ", p ", p(model, var, p_digits))
}

orp <- function(model, var, or_digits = 2, p_digits = 3) {
    paste0("OR = ", or(model, var, or_digits), ", p ", p(model, var, p_digits))
}

# --- Utility functions ---
add_comma <- function(x, ...) format(x, ..., big.mark = ",", scientific = FALSE, trim = TRUE)
number_to_word <- function(x) {
    words <- c("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")
    ifelse(x > 10 | x < 1, as.character(x), words[x])
}
round1 <- function(x) round(as.numeric(x), 1)
round2 <- function(x) round(as.numeric(x), 2)
na_to_dash <- function(x) { x <- as.character(x); ifelse(is.na(x) | x == "NA", "-", x) }
na_to_blank <- function(x) { x <- as.character(x); ifelse(is.na(x) | x == "NA", "", x) }
`%nin%` <- function(x, table) !x %in% table
pval <- function(x) dplyr::case_when(x < 0.001 ~ "$p < 0.001$", x < 0.01 ~ "$p < 0.01$", x < 0.05 ~ "$p < 0.05$", TRUE ~ "$p > 0.05$")
format_exp <- function(model, coef = 2, digits = 1) formatC((exp(stats::coef(model)[coef]) - 1) * 100, format = "f", digits = digits)

# --- Format detection ---
get_star_format <- function() dplyr::case_when(knitr::is_latex_output() ~ "latex", knitr::is_html_output() ~ "html", TRUE ~ "text")
get_kable_format <- function() dplyr::case_when(knitr::is_latex_output() ~ "latex", knitr::is_html_output() ~ "html", TRUE ~ "markdown")
get_xtable_format <- function() dplyr::case_when(knitr::is_latex_output() ~ "latex", TRUE ~ "html")

# --- Label conversion ---
.scholr_env <- new.env()
set_label_mappings <- function(mappings, append = TRUE) {
    if (append && exists("custom_mappings", envir = .scholr_env)) {
        mappings <- c(mappings, get("custom_mappings", envir = .scholr_env))
    }
    assign("custom_mappings", mappings, envir = .scholr_env)
    invisible(mappings)
}
get_label_mappings <- function() {
    if (exists("custom_mappings", envir = .scholr_env)) get("custom_mappings", envir = .scholr_env) else NULL
}
clear_label_mappings <- function() {
    if (exists("custom_mappings", envir = .scholr_env)) rm("custom_mappings", envir = .scholr_env)
    invisible(NULL)
}

.norm_strip_wave <- function(x) {
    x <- stringr::str_replace(x, "(?:_(16|20|24))(?=_(fct|bin|int|ihs|z)\\b)", "")
    x <- stringr::str_replace(x, "(?:_(16|20|24))$", "")
    stringr::str_replace(x, "(?<=\\D)(16|20|24)$", "")
}

convert_labels <- function(model, extracted = FALSE, use_defaults = TRUE) {
    labs <- if (extracted) model else broom::tidy(model)$term
    labs_orig <- labs
    labs_norm <- .norm_strip_wave(labs)
    labs2 <- labs
    custom <- get_label_mappings()
    already_labeled <- rep(FALSE, length(labs))
    if (!is.null(custom) && length(custom) > 0) {
        for (i in seq_along(custom)) {
            pattern <- names(custom)[i]
            replacement <- custom[i]
            matches <- (stringr::str_detect(labs, pattern) | stringr::str_detect(labs_norm, pattern)) & !already_labeled
            if (any(matches)) { labs2[matches] <- replacement; already_labeled[matches] <- TRUE }
        }
    }
    if (use_defaults) labs2 <- .apply_default_mappings(labs, labs2, labs_norm)
    labs2
}

.apply_default_mappings <- function(labs, labs2, labs_norm) {
    unchanged <- labs2 == labs
    dplyr::case_when(
        !unchanged ~ labs2,
        labs == "(Intercept)" ~ "(Intercept)",
        stringr::str_detect(labs_norm, "^age$") ~ "Age",
        stringr::str_detect(labs_norm, "^female") ~ "Female",
        stringr::str_detect(labs_norm, "^male") ~ "Male",
        stringr::str_detect(labs_norm, "^educ|^education") ~ "Education",
        stringr::str_detect(labs_norm, "^income") ~ "Income",
        stringr::str_detect(labs_norm, "^married") ~ "Married",
        stringr::str_detect(labs_norm, "^employed") ~ "Employed",
        stringr::str_detect(labs_norm, "race.*[bB]lack") ~ "Race: Black",
        stringr::str_detect(labs_norm, "race.*[hH]ispanic") ~ "Race: Hispanic",
        stringr::str_detect(labs_norm, "race.*[wW]hite") ~ "Race: White",
        stringr::str_detect(labs_norm, "race.*[oO]ther") ~ "Race: Other",
        stringr::str_detect(labs_norm, "^pid7") ~ "Party ID (7-point)",
        stringr::str_detect(labs_norm, "^pid3") ~ "Party ID (3-cat)",
        stringr::str_detect(labs_norm, "^ideo7") ~ "Ideology (7-point)",
        stringr::str_detect(labs, ":") ~ labs,
        TRUE ~ labs
    )
}

# --- Stargazer helpers ---
star_cut_vector <- c(0.05, NA, NA)
star0 <- function(..., type = NULL, digits = 3, star.cutoffs = star_cut_vector) {
    if (is.null(type)) type <- get_star_format()
    stargazer::stargazer(..., digits = digits, header = FALSE, type = type, align = TRUE,
                         font.size = "scriptsize", star.cutoffs = star.cutoffs,
                         notes.append = FALSE, notes = "*$p<0.05$")
}

star_var <- function(..., omit = NULL) {
    stargazer_output <- utils::capture.output(stargazer::stargazer(..., type = "text", omit = omit))
    drop_idx <- which(stringr::str_detect(stargazer_output, "^Constant"))
    if (length(drop_idx) > 0) stargazer_output <- stargazer_output[1:(drop_idx[1] - 1)]
    variable_lines <- grep("^[[:alpha:]]", stargazer_output, value = TRUE)
    variable_names <- vapply(variable_lines, function(line) strsplit(line, "  +")[[1]][1], character(1))
    convert_labels(unname(variable_names), extracted = TRUE)
}

# --- TeXcount ---
tc_count <- function(file = NULL, include_bib = TRUE, include_headers = FALSE) {
    if (is.null(file)) {
        if (requireNamespace("knitr", quietly = TRUE) && !is.null(knitr::current_input())) {
            file <- sub("\\.[Rr]md$", ".tex", knitr::current_input())
        } else stop("No file specified and cannot detect current input file.")
    }
    if (!file.exists(file)) {
        warning("TeX file not found: ", file)
        return(list(text = NA, headers = NA, outside = NA, total = NA,
                    total_formatted = "[compile twice for word count]", raw = NULL))
    }
    tc_check <- suppressWarnings(system("which texcount", intern = TRUE, ignore.stderr = TRUE))
    if (length(tc_check) == 0 || !nzchar(tc_check)) {
        warning("texcount not found")
        return(list(text = NA, headers = NA, outside = NA, total = NA,
                    total_formatted = "[texcount not installed]", raw = NULL))
    }
    cmd <- paste(if (include_bib) "texcount -inc -incbib -total -sum" else "texcount -inc -total -sum", shQuote(file))
    tc_out <- system(cmd, intern = TRUE, ignore.stderr = TRUE)
    extract_count <- function(pattern) {
        line <- grep(pattern, tc_out, value = TRUE)
        if (length(line) == 0) return(NA)
        as.numeric(sub(".*?(\\d+).*", "\\1", line[1]))
    }
    text_count <- extract_count("Words in text")
    header_count <- extract_count("Words in headers")
    outside_count <- extract_count("Words outside text")
    total <- sum(c(text_count, outside_count), na.rm = TRUE)
    if (include_headers) total <- sum(c(total, header_count), na.rm = TRUE)
    list(text = text_count, headers = header_count, outside = outside_count, total = total,
         total_formatted = format(total, big.mark = ",", scientific = FALSE), raw = tc_out)
}

tc_words <- function(file = NULL, include_bib = TRUE, include_headers = FALSE) {
    tc_count(file = file, include_bib = include_bib, include_headers = include_headers)$total_formatted
}

# =============================================================================
# End of local scholr functions
# =============================================================================
